Classification of Handwritten Digits
Anna Androvitsanea
aandrovitsanea@aueb.gr
In this project I present a program that classifies handwritten digits.
The programm is written in Python and consists of the following steps.
Introduction
Task 1
Task 2
Task 3
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
%matplotlib inline
from matplotlib import pyplot as plt
from scipy.linalg import svd
import numpy as np
from sklearn.datasets import load_digits
from matplotlib import pyplot as plt
from sklearn.decomposition import TruncatedSVD
float_formatter = lambda x: "%.2f" % x
np.set_printoptions(formatter={'float_kind':float_formatter})
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from tabulate import tabulate
#! pip install progressbar2
import progressbar
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.model_selection import train_test_split
# import data
azip = pd.read_excel('data.xlsx', 'azip',
header = None) # vector that holds the digits (the number)
dzip = pd.read_excel('data.xlsx', 'dzip',
header = None) # 256 x 1707 array that holds the training images
testzip = pd.read_excel('data.xlsx', 'testzip',
header = None) # test data
dtest = pd.read_excel('data.xlsx', 'dtest',
header = None) # test data
# rename and tranform to array
labels_train = dzip.to_numpy()
train = azip.to_numpy()
labels_test = dtest.to_numpy()
test = testzip.to_numpy()
# check shapes of data
labels_train.shape, train.shape, labels_test.shape, test.shape
((1, 1707), (256, 1707), (1, 2007), (256, 2007))
# merge train and test set to prepare
# for train/test splitting
X = np.concatenate((train, test), axis = 1)
y = np.concatenate((labels_train.reshape(1707),
labels_test.reshape(2007)))
### ===== train/test splitting ===== ###
### ------------------------------------ ###
idx1 = int(round(3714*0.8, 0))
idx2 = idx1 + 1
X_train, X_test = X[:,:idx1], X[:,idx2:]
y_train, y_test = y[:idx1], y[idx2:]
X_train.shape, X_test.shape, y_train.shape, y_test.shape
((256, 2971), (256, 742), (2971,), (742,))
# check dimensions
print("Size of training data are:",
y_train.shape, X_train.shape)
print("Size of testing data are:",
y_test.shape, X_test.shape)
Size of training data are: (2971,) (256, 2971) Size of testing data are: (742,) (256, 742)
# check training data
# check frequency per digit
unique, counts = np.unique(y_train, return_counts=True)
counts = np.asarray((unique, counts))
for digit in counts[0]:
print(counts[0][digit], counts[1][digit])
0 567 1 402 2 328 3 236 4 235 5 166 6 269 7 260 8 260 9 248
# check test data
# check frequency per digit
unique_test, counts_test = np.unique(y_test, return_counts=True)
counts_test = np.asarray((unique_test, counts_test))
for digit in counts_test[0]:
print(counts_test[0][digit], counts_test[1][digit])
0 111 1 114 2 72 3 61 4 87 5 82 6 51 7 53 8 50 9 61
# plot random digit from train
image = 1
fig = plt.figure(figsize=(4,2))
plt.imshow(X_train[:, image].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
print("This digit represents number %d." %y_train[image])
plt.show()
This digit represents number 5.
# plot random digit from test
fig = plt.figure(figsize=(4,2))
plt.imshow(X_test[:, image].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
print("This digit represents number %d." %y_test[image])
plt.show()
This digit represents number 8.
For each digit I will calculate the SVD, based on the training data.
Specifically, the singular value decomposition of the m × n ${\displaystyle m\times n} m\times n$ complex matrix M will be calculated as the factorization of the form $${\displaystyle \mathbf {U\Sigma V^{*}}}$$
# Calculate the SVD for each digit
SVD = {d:{} for d in range(10)} # store the SVD for each digit, d
for d in range(10):
# index train dataset per digit
SVD[d] = np.linalg.svd(X_train[:, np.where(y_train == d)[0]],
full_matrices=True)
For the given number of basis vectors, n = 5 - 20, I extract the first n columns of the U matrix of the calculated SVD.
I will then calculate the residuals based on the relation $||(I - U_kU_k^T)z ||$ for each digit.
The minimum residual per digit for each test image will yield the digit that the image represents.
clas_rep = {}
cm = {}
accuracy = {}
# n is the number of basis vectors
for n in progressbar.progressbar(range(5, 21)):
print("Calculating for %d basis vectors." %n)
# calculate the U_k @ U_k.T once and store it
UUT = {}
for d in range(10): # for each digit
UUT[d] = SVD[d][0][:, :n] @ SVD[d][0][:, :n].T
# store results for residuals and predictions
res = np.zeros(10)
classify = np.zeros(X_test.shape[1])
bad = np.zeros(X_test.shape[1])
for image in range(X_test.shape[1]): # iterate over the vectors of test images (2007)
for d in range(10): # iterate over the number of digits (0-9)
res[d] = np.linalg.norm((np.identity(X_test.shape[0]) - UUT[d])@X_test[:, image])
# for each image extract the index
# for the minimum residual
# and classify
classify[image] = np.argmin(res)
bad[image] = np.argmax(res)
# for each basis vector n
# store the classification report, ie. the F1 and recall
clas_rep[n] = classification_report(y_test, classify, output_dict = True)
# store confusion matrix
cm[n] = confusion_matrix(y_test, classify)
# store accuracy
accuracy[n] = cm[n].trace() / X_test.shape[1]
N/A% (0 of 16) | | Elapsed Time: 0:00:00 ETA: --:--:--
Calculating for 5 basis vectors.
6% (1 of 16) |# | Elapsed Time: 0:00:01 ETA: 0:00:25
Calculating for 6 basis vectors.
12% (2 of 16) |### | Elapsed Time: 0:00:03 ETA: 0:00:23
Calculating for 7 basis vectors.
18% (3 of 16) |#### | Elapsed Time: 0:00:05 ETA: 0:00:26
Calculating for 8 basis vectors.
25% (4 of 16) |###### | Elapsed Time: 0:00:06 ETA: 0:00:21
Calculating for 9 basis vectors.
31% (5 of 16) |####### | Elapsed Time: 0:00:09 ETA: 0:00:23
Calculating for 10 basis vectors.
37% (6 of 16) |######### | Elapsed Time: 0:00:12 ETA: 0:00:36
Calculating for 11 basis vectors.
43% (7 of 16) |########## | Elapsed Time: 0:00:17 ETA: 0:00:37
Calculating for 12 basis vectors.
50% (8 of 16) |############ | Elapsed Time: 0:00:19 ETA: 0:00:18
Calculating for 13 basis vectors.
56% (9 of 16) |############## | Elapsed Time: 0:00:21 ETA: 0:00:14
Calculating for 14 basis vectors.
62% (10 of 16) |############### | Elapsed Time: 0:00:22 ETA: 0:00:10
Calculating for 15 basis vectors.
68% (11 of 16) |################ | Elapsed Time: 0:00:24 ETA: 0:00:08
Calculating for 16 basis vectors.
75% (12 of 16) |################## | Elapsed Time: 0:00:26 ETA: 0:00:06
Calculating for 17 basis vectors.
81% (13 of 16) |################### | Elapsed Time: 0:00:28 ETA: 0:00:05
Calculating for 18 basis vectors.
87% (14 of 16) |##################### | Elapsed Time: 0:00:29 ETA: 0:00:03
Calculating for 19 basis vectors.
93% (15 of 16) |###################### | Elapsed Time: 0:00:31 ETA: 0:00:01
Calculating for 20 basis vectors.
100% (16 of 16) |########################| Elapsed Time: 0:00:32 Time: 0:00:32
# Print the names of the columns.
print ("{:<10} {:<10}".format('Basis vec.', 'Accuracy (%)'))
# print each data item.
for key in accuracy.keys():
print ("{:<10} {:.2f}".format(key, round(accuracy[key] * 100, 2)))
Basis vec. Accuracy (%) 5 92.05 6 92.99 7 92.32 8 92.72 9 93.40 10 93.80 11 94.47 12 94.47 13 94.61 14 94.61 15 95.01 16 94.88 17 94.34 18 94.61 19 94.88 20 94.74
# plot number of basis vectors and accuracy scores
# plot the highest accuracy
plt.figure(figsize=(10,8)) # set the size of figure
# plot accuracy for each number of basis vectors
plt.scatter(accuracy.keys() , accuracy.values(), c= 'm')
# Plot the maximum accuracy of the curve
# list of keys and values
keys = list(accuracy.keys())
values = list(accuracy.values())
plt.scatter(keys[values.index(max(values))], max(values),
c = 'c')
plt.text(17, 0.936,
'Accuracy = %.3f \n n basis vector = %d' % ( max(values),
keys[values.index(max(values))]), fontsize=12)
plt.title('Accuracy in % for the numbers of basis vectors (5, 20)',
fontsize=17) # add a title
plt.xlabel('Number of basis vectors', fontsize=14) # add x label
plt.ylabel('Accuracy (%)', fontsize=14) # add y label
plt.legend(fontsize = 14)
plt.show()
No handles with labels found to put in legend.
# view the confusion matrix
for n in range(5, 21):
print("Confusion matrix for %d basis vectors" %n)
print("---------------------------------------------------")
print(pd.DataFrame(cm[n],
index = [i for i in range(0, 10)],
columns = [i for i in range(0, 10)]))
print("---------------------------------------------------")
Confusion matrix for 5 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 65 0 2 1 2 0 0 0
3 1 0 1 55 0 2 0 0 1 1
4 2 0 2 0 72 0 2 3 1 5
5 3 0 0 6 0 72 0 0 1 0
6 1 0 0 0 1 0 49 0 0 0
7 0 0 0 1 0 0 0 49 0 3
8 0 0 1 4 1 1 0 1 41 1
9 0 0 0 0 1 0 0 1 1 58
---------------------------------------------------
Confusion matrix for 6 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 0 1 68 0 2 0 1 0 0 0
3 2 0 1 54 0 2 0 0 1 1
4 0 0 1 0 76 0 1 5 1 3
5 1 1 0 6 0 72 2 0 0 0
6 1 0 0 0 1 0 49 0 0 0
7 0 0 0 1 0 0 0 52 0 0
8 1 0 1 3 1 1 0 2 40 1
9 0 0 0 0 0 0 0 2 2 57
---------------------------------------------------
Confusion matrix for 7 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 2 65 0 2 0 1 1 0 0
3 2 0 1 54 0 2 0 0 1 1
4 2 0 1 0 76 0 1 5 0 2
5 2 1 0 5 0 73 0 1 0 0
6 0 2 0 0 1 1 46 0 1 0
7 0 0 0 1 0 0 0 52 0 0
8 2 0 1 1 1 1 1 2 40 1
9 0 0 0 0 1 0 0 1 2 57
---------------------------------------------------
Confusion matrix for 8 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 2 65 1 2 0 1 0 0 0
3 2 0 1 53 0 2 0 0 2 1
4 1 0 1 0 78 0 2 2 0 3
5 1 1 0 6 0 73 0 1 0 0
6 1 2 0 0 0 1 47 0 0 0
7 0 0 0 1 0 0 0 52 0 0
8 2 0 1 1 1 2 0 1 41 1
9 0 0 0 0 0 0 0 3 1 57
---------------------------------------------------
Confusion matrix for 9 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 2 65 1 1 0 1 0 0 1
3 1 0 1 54 0 2 0 0 2 1
4 1 0 0 0 77 0 2 1 1 5
5 0 1 0 5 0 76 0 0 0 0
6 1 1 0 0 0 1 48 0 0 0
7 0 0 0 1 0 0 0 52 0 0
8 2 0 2 1 1 1 0 1 41 1
9 0 0 0 0 0 0 0 3 0 58
---------------------------------------------------
Confusion matrix for 10 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 66 1 1 0 1 1 0 0
3 1 0 1 55 1 3 0 0 0 0
4 1 0 0 0 77 0 2 1 1 5
5 1 1 1 4 0 75 0 0 0 0
6 1 1 0 0 0 0 49 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 2 0 2 2 0 0 0 1 42 1
9 0 0 0 0 0 0 0 3 0 58
---------------------------------------------------
Confusion matrix for 11 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 68 0 2 0 0 0 0 0
3 1 0 1 55 1 3 0 0 0 0
4 0 0 0 0 81 0 2 1 0 3
5 1 1 1 5 0 74 0 0 0 0
6 0 1 0 0 1 0 49 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 2 0 1 1 0 1 0 0 43 2
9 0 0 0 0 2 1 0 0 1 57
---------------------------------------------------
Confusion matrix for 12 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 68 0 1 0 0 1 0 0
3 0 0 1 55 1 4 0 0 0 0
4 0 0 0 0 80 0 2 1 0 4
5 1 1 1 5 0 74 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 2 0 1 1 0 1 0 0 43 2
9 0 0 0 0 2 1 0 0 1 57
---------------------------------------------------
Confusion matrix for 13 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 110 0 0 0 0 0 1 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 68 0 1 0 0 0 1 0
3 0 0 1 56 1 3 0 0 0 0
4 0 0 0 0 80 0 2 0 0 5
5 2 1 1 4 0 74 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 0 1 0 2 0 0 44 2
9 0 0 0 0 2 0 0 1 1 57
---------------------------------------------------
Confusion matrix for 14 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 67 1 1 0 0 1 0 0
3 0 0 1 56 1 3 0 0 0 0
4 0 0 0 0 81 0 1 1 0 4
5 2 1 0 5 0 74 0 0 0 0
6 0 1 0 0 1 0 49 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 0 44 2
9 0 0 0 0 3 1 0 0 0 57
---------------------------------------------------
Confusion matrix for 15 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 111 0 0 3 0 0 0 0 0
2 1 1 69 0 1 0 0 0 0 0
3 0 0 1 55 1 4 0 0 0 0
4 0 0 0 0 82 0 0 2 0 3
5 1 1 0 5 0 75 0 0 0 0
6 0 1 0 0 1 0 49 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 0 44 2
9 0 0 0 1 2 0 0 1 0 57
---------------------------------------------------
Confusion matrix for 16 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 110 0 0 3 0 1 0 0 0
2 1 1 67 0 2 1 0 0 0 0
3 0 0 1 56 0 4 0 0 0 0
4 0 0 0 0 82 0 0 2 0 3
5 1 1 0 4 0 76 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 1 43 2
9 0 0 0 0 2 1 0 1 0 57
---------------------------------------------------
Confusion matrix for 17 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 110 0 0 3 0 1 0 0 0
2 1 1 67 0 2 1 0 0 0 0
3 1 0 1 55 1 3 0 0 0 0
4 0 0 0 0 81 0 0 2 0 4
5 1 1 0 6 0 74 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 1 43 2
9 0 0 0 0 1 0 0 2 1 57
---------------------------------------------------
Confusion matrix for 18 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 110 0 0 3 0 1 0 0 0
2 1 0 69 0 2 0 0 0 0 0
3 1 0 1 54 1 4 0 0 0 0
4 0 0 0 0 81 0 0 2 0 4
5 1 1 0 5 0 75 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 1 43 2
9 0 0 0 0 1 0 0 2 1 57
---------------------------------------------------
Confusion matrix for 19 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 110 0 0 3 0 1 0 0 0
2 2 0 68 0 2 0 0 0 0 0
3 1 0 1 54 1 4 0 0 0 0
4 0 0 0 0 84 0 0 2 0 1
5 1 1 0 6 0 74 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 1 1 0 1 0 1 44 1
9 0 0 0 0 1 0 0 2 1 57
---------------------------------------------------
Confusion matrix for 20 basis vectors
---------------------------------------------------
0 1 2 3 4 5 6 7 8 9
0 111 0 0 0 0 0 0 0 0 0
1 0 110 0 0 3 0 1 0 0 0
2 2 0 67 0 2 0 0 0 1 0
3 0 0 1 54 1 4 0 0 1 0
4 0 0 0 0 83 0 0 2 0 2
5 1 1 0 5 0 75 0 0 0 0
6 0 1 0 0 0 0 50 0 0 0
7 0 0 1 0 0 0 0 52 0 0
8 1 0 2 0 0 1 0 1 44 1
9 0 0 0 0 1 0 0 2 1 57
---------------------------------------------------
for n in range(5, 21):
print(" Classification report for %d basis vectors" %n)
print("-------------------------------------------------------")
print(pd.DataFrame(clas_rep[n]).transpose())
print("-------------------------------------------------------")
Classification report for 5 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.932773 1.000000 0.965217 111.000000
1 0.991071 0.973684 0.982301 114.000000
2 0.942029 0.902778 0.921986 72.000000
3 0.833333 0.901639 0.866142 61.000000
4 0.900000 0.827586 0.862275 87.000000
5 0.947368 0.878049 0.911392 82.000000
6 0.924528 0.960784 0.942308 51.000000
7 0.907407 0.924528 0.915888 53.000000
8 0.911111 0.820000 0.863158 50.000000
9 0.852941 0.950820 0.899225 61.000000
accuracy 0.920485 0.920485 0.920485 0.920485
macro avg 0.914256 0.913987 0.912989 742.000000
weighted avg 0.921822 0.920485 0.920083 742.000000
-------------------------------------------------------
Classification report for 6 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.956897 1.000000 0.977974 111.000000
1 0.982301 0.973684 0.977974 114.000000
2 0.957746 0.944444 0.951049 72.000000
3 0.843750 0.885246 0.864000 61.000000
4 0.915663 0.873563 0.894118 87.000000
5 0.960000 0.878049 0.917197 82.000000
6 0.924528 0.960784 0.942308 51.000000
7 0.852459 0.981132 0.912281 53.000000
8 0.909091 0.800000 0.851064 50.000000
9 0.919355 0.934426 0.926829 61.000000
accuracy 0.929919 0.929919 0.929919 0.929919
macro avg 0.922179 0.923133 0.921479 742.000000
weighted avg 0.931096 0.929919 0.929542 742.000000
-------------------------------------------------------
Classification report for 7 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.925000 1.000000 0.961039 111.000000
1 0.956897 0.973684 0.965217 114.000000
2 0.955882 0.902778 0.928571 72.000000
3 0.885246 0.885246 0.885246 61.000000
4 0.904762 0.873563 0.888889 87.000000
5 0.948052 0.890244 0.918239 82.000000
6 0.938776 0.901961 0.920000 51.000000
7 0.838710 0.981132 0.904348 53.000000
8 0.909091 0.800000 0.851064 50.000000
9 0.934426 0.934426 0.934426 61.000000
accuracy 0.923181 0.923181 0.923181 0.923181
macro avg 0.919684 0.914303 0.915704 742.000000
weighted avg 0.924290 0.923181 0.922641 742.000000
-------------------------------------------------------
Classification report for 8 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.932773 1.000000 0.965217 111.000000
1 0.956897 0.973684 0.965217 114.000000
2 0.955882 0.902778 0.928571 72.000000
3 0.854839 0.868852 0.861789 61.000000
4 0.928571 0.896552 0.912281 87.000000
5 0.935897 0.890244 0.912500 82.000000
6 0.940000 0.921569 0.930693 51.000000
7 0.881356 0.981132 0.928571 53.000000
8 0.931818 0.820000 0.872340 50.000000
9 0.919355 0.934426 0.926829 61.000000
accuracy 0.927224 0.927224 0.927224 0.927224
macro avg 0.923739 0.918924 0.920401 742.000000
weighted avg 0.927824 0.927224 0.926721 742.000000
-------------------------------------------------------
Classification report for 9 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.948718 1.000000 0.973684 111.000000
1 0.965217 0.973684 0.969432 114.000000
2 0.955882 0.902778 0.928571 72.000000
3 0.870968 0.885246 0.878049 61.000000
4 0.939024 0.885057 0.911243 87.000000
5 0.950000 0.926829 0.938272 82.000000
6 0.941176 0.941176 0.941176 51.000000
7 0.912281 0.981132 0.945455 53.000000
8 0.931818 0.820000 0.872340 50.000000
9 0.878788 0.950820 0.913386 61.000000
accuracy 0.933962 0.933962 0.933962 0.933962
macro avg 0.929387 0.926672 0.927161 742.000000
weighted avg 0.934553 0.933962 0.933519 742.000000
-------------------------------------------------------
Classification report for 10 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.940678 1.000000 0.969432 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.929577 0.916667 0.923077 72.000000
3 0.887097 0.901639 0.894309 61.000000
4 0.939024 0.885057 0.911243 87.000000
5 0.961538 0.914634 0.937500 82.000000
6 0.942308 0.960784 0.951456 51.000000
7 0.896552 0.981132 0.936937 53.000000
8 0.976744 0.840000 0.903226 50.000000
9 0.906250 0.950820 0.928000 61.000000
accuracy 0.938005 0.938005 0.938005 0.938005
macro avg 0.935345 0.932442 0.932886 742.000000
weighted avg 0.938939 0.938005 0.937636 742.000000
-------------------------------------------------------
Classification report for 11 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.956897 1.000000 0.977974 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.944444 0.944444 0.944444 72.000000
3 0.901639 0.901639 0.901639 61.000000
4 0.900000 0.931034 0.915254 87.000000
5 0.936709 0.902439 0.919255 82.000000
6 0.960784 0.960784 0.960784 51.000000
7 0.981132 0.981132 0.981132 53.000000
8 0.977273 0.860000 0.914894 50.000000
9 0.919355 0.934426 0.926829 61.000000
accuracy 0.944744 0.944744 0.944744 0.944744
macro avg 0.945192 0.938958 0.941589 742.000000
weighted avg 0.945108 0.944744 0.944531 742.000000
-------------------------------------------------------
Classification report for 12 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.965217 1.000000 0.982301 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.944444 0.944444 0.944444 72.000000
3 0.901639 0.901639 0.901639 61.000000
4 0.919540 0.919540 0.919540 87.000000
5 0.925000 0.902439 0.913580 82.000000
6 0.961538 0.980392 0.970874 51.000000
7 0.962963 0.981132 0.971963 53.000000
8 0.977273 0.860000 0.914894 50.000000
9 0.904762 0.934426 0.919355 61.000000
accuracy 0.944744 0.944744 0.944744 0.944744
macro avg 0.943606 0.939770 0.941227 742.000000
weighted avg 0.944904 0.944744 0.944478 742.000000
-------------------------------------------------------
Classification report for 13 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.964912 0.990991 0.977778 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.957746 0.944444 0.951049 72.000000
3 0.918033 0.918033 0.918033 61.000000
4 0.919540 0.919540 0.919540 87.000000
5 0.936709 0.902439 0.919255 82.000000
6 0.943396 0.980392 0.961538 51.000000
7 0.981132 0.981132 0.981132 53.000000
8 0.956522 0.880000 0.916667 50.000000
9 0.890625 0.934426 0.912000 61.000000
accuracy 0.946092 0.946092 0.946092 0.946092
macro avg 0.944230 0.942508 0.943068 742.000000
weighted avg 0.946281 0.946092 0.945945 742.000000
-------------------------------------------------------
Classification report for 14 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.965217 1.000000 0.982301 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.957143 0.930556 0.943662 72.000000
3 0.888889 0.918033 0.903226 61.000000
4 0.900000 0.931034 0.915254 87.000000
5 0.936709 0.902439 0.919255 82.000000
6 0.980000 0.960784 0.970297 51.000000
7 0.962963 0.981132 0.971963 53.000000
8 1.000000 0.880000 0.936170 50.000000
9 0.904762 0.934426 0.919355 61.000000
accuracy 0.946092 0.946092 0.946092 0.946092
macro avg 0.946937 0.941209 0.943517 742.000000
weighted avg 0.946891 0.946092 0.946051 742.000000
-------------------------------------------------------
Classification report for 15 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.973684 1.000000 0.986667 111.000000
1 0.973684 0.973684 0.973684 114.000000
2 0.958333 0.958333 0.958333 72.000000
3 0.887097 0.901639 0.894309 61.000000
4 0.911111 0.942529 0.926554 87.000000
5 0.937500 0.914634 0.925926 82.000000
6 1.000000 0.960784 0.980000 51.000000
7 0.945455 0.981132 0.962963 53.000000
8 1.000000 0.880000 0.936170 50.000000
9 0.919355 0.934426 0.926829 61.000000
accuracy 0.950135 0.950135 0.950135 0.950135
macro avg 0.950622 0.944716 0.947144 742.000000
weighted avg 0.950840 0.950135 0.950096 742.000000
-------------------------------------------------------
Classification report for 16 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.973684 1.000000 0.986667 111.000000
1 0.973451 0.964912 0.969163 114.000000
2 0.957143 0.930556 0.943662 72.000000
3 0.918033 0.918033 0.918033 61.000000
4 0.921348 0.942529 0.931818 87.000000
5 0.915663 0.926829 0.921212 82.000000
6 0.980392 0.980392 0.980392 51.000000
7 0.928571 0.981132 0.954128 53.000000
8 1.000000 0.860000 0.924731 50.000000
9 0.919355 0.934426 0.926829 61.000000
accuracy 0.948787 0.948787 0.948787 0.948787
macro avg 0.948764 0.943881 0.945664 742.000000
weighted avg 0.949465 0.948787 0.948649 742.000000
-------------------------------------------------------
Classification report for 17 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.965217 1.000000 0.982301 111.000000
1 0.973451 0.964912 0.969163 114.000000
2 0.957143 0.930556 0.943662 72.000000
3 0.887097 0.901639 0.894309 61.000000
4 0.920455 0.931034 0.925714 87.000000
5 0.936709 0.902439 0.919255 82.000000
6 0.980392 0.980392 0.980392 51.000000
7 0.912281 0.981132 0.945455 53.000000
8 0.977273 0.860000 0.914894 50.000000
9 0.904762 0.934426 0.919355 61.000000
accuracy 0.943396 0.943396 0.943396 0.943396
macro avg 0.941478 0.938653 0.939450 742.000000
weighted avg 0.943982 0.943396 0.943217 742.000000
-------------------------------------------------------
Classification report for 18 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.965217 1.000000 0.982301 111.000000
1 0.982143 0.964912 0.973451 114.000000
2 0.958333 0.958333 0.958333 72.000000
3 0.900000 0.885246 0.892562 61.000000
4 0.920455 0.931034 0.925714 87.000000
5 0.937500 0.914634 0.925926 82.000000
6 0.980392 0.980392 0.980392 51.000000
7 0.912281 0.981132 0.945455 53.000000
8 0.977273 0.860000 0.914894 50.000000
9 0.904762 0.934426 0.919355 61.000000
accuracy 0.946092 0.946092 0.946092 0.946092
macro avg 0.943836 0.941011 0.941838 742.000000
weighted avg 0.946581 0.946092 0.945893 742.000000
-------------------------------------------------------
Classification report for 19 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.956897 1.000000 0.977974 111.000000
1 0.982143 0.964912 0.973451 114.000000
2 0.957746 0.944444 0.951049 72.000000
3 0.885246 0.885246 0.885246 61.000000
4 0.923077 0.965517 0.943820 87.000000
5 0.936709 0.902439 0.919255 82.000000
6 0.980392 0.980392 0.980392 51.000000
7 0.912281 0.981132 0.945455 53.000000
8 0.977778 0.880000 0.926316 50.000000
9 0.966102 0.934426 0.950000 61.000000
accuracy 0.948787 0.948787 0.948787 0.948787
macro avg 0.947837 0.943851 0.945296 742.000000
weighted avg 0.949363 0.948787 0.948612 742.000000
-------------------------------------------------------
Classification report for 20 basis vectors
-------------------------------------------------------
precision recall f1-score support
0 0.965217 1.000000 0.982301 111.000000
1 0.982143 0.964912 0.973451 114.000000
2 0.943662 0.930556 0.937063 72.000000
3 0.915254 0.885246 0.900000 61.000000
4 0.922222 0.954023 0.937853 87.000000
5 0.937500 0.914634 0.925926 82.000000
6 0.980392 0.980392 0.980392 51.000000
7 0.912281 0.981132 0.945455 53.000000
8 0.936170 0.880000 0.907216 50.000000
9 0.950000 0.934426 0.942149 61.000000
accuracy 0.947439 0.947439 0.947439 0.947439
macro avg 0.944484 0.942532 0.943181 742.000000
weighted avg 0.947568 0.947439 0.947220 742.000000
-------------------------------------------------------
For the number of basis vectors that provide the best accuracy, aka n = 18 I will find the relevant scores per digit.
# select the confusion matrix for the optimum
# number of basis vectors
opt_cm = pd.DataFrame(cm[keys[values.index(max(values))]],
index = [i for i in range(0, 10)],
columns = [i for i in range(0, 10)])
# find best scores
# per digit
# correctly identified digits/all digits
opt_res = []
for pos in range(opt_cm.shape[0]):
accu = opt_cm.loc[pos, pos] / opt_cm.loc[pos,:].sum()
opt_res.append(round(100 * accu, 2))
pd.DataFrame(opt_res,
index = [i for i in range(0,10)],
columns = ['accuracy'])
| accuracy | |
|---|---|
| 0 | 100.00 |
| 1 | 97.37 |
| 2 | 95.83 |
| 3 | 90.16 |
| 4 | 94.25 |
| 5 | 91.46 |
| 6 | 96.08 |
| 7 | 98.11 |
| 8 | 88.00 |
| 9 | 93.44 |
# sort in ascending order
print("Digit |","Accuracy")
accuracies = pd.DataFrame(opt_res,
index = [i for i in range(0,10)],
columns = ['accuracy'])
# sort asceding
# check values with lowest score
accuracies.sort_values(by='accuracy')
Digit | Accuracy
| accuracy | |
|---|---|
| 8 | 88.00 |
| 3 | 90.16 |
| 5 | 91.46 |
| 9 | 93.44 |
| 4 | 94.25 |
| 2 | 95.83 |
| 6 | 96.08 |
| 1 | 97.37 |
| 7 | 98.11 |
| 0 | 100.00 |
I notice that digits **8**, **3**, **5** and **9** have the worse accuracy, meaning that they have been the most difficult to get identified.
Let's take a closer look on their plots
# store dimensions of shape
dim0_8 = X_test[:, np.where(y_test == 8)].shape[0]
dim2_8 = X_test[:, np.where(y_test == 8)].shape[2]
dim0_8, dim2_8
(256, 50)
# Plot all 8s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 10, ncols = 5,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 8)].reshape(dim0_8,
dim2_8)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is often pure and therefore difficult to classify.
# store dimensions of shape
dim0_3 = X_test[:, np.where(y_test == 3)].shape[0]
dim2_3 = X_test[:, np.where(y_test == 3)].shape[2]
dim0_3, dim2_3
(256, 61)
# Plot all 3s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 8, ncols = 7,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 3)].reshape(dim0_3,
dim2_3)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is often pure and therefore difficult to classify.
# store dimensions of shape
dim0_5 = X_test[:, np.where(y_test == 5)].shape[0]
dim2_5 = X_test[:, np.where(y_test == 5)].shape[2]
dim0_5, dim2_5
(256, 82)
# Plot all 5s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 20, ncols = 4,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 5)].reshape(dim0_5,
dim2_5)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is often pure and therefore difficult to classify.
# store dimensions of shape
dim0_9 = X_test[:, np.where(y_test == 9)].shape[0]
dim2_9 = X_test[:, np.where(y_test == 9)].shape[2]
dim0_9, dim2_9
(256, 61)
# Plot all 9s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 10, ncols = 6,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 9)].reshape(dim0_9,
dim2_9)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is often pure and therefore difficult to classify.
But let's also check digits **0** and **1** which have the highest score in regard to their accurate classification.
# store dimensions of shape
dim0_0 = X_test[:, np.where(y_test == 0)].shape[0]
dim2_0 = X_test[:, np.where(y_test == 0)].shape[2]
dim0_0, dim2_0
(256, 111)
# Plot all 0s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 11, ncols = 10,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 0)].reshape(dim0_0,
dim2_0)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is excelent and therefore easy for digit 0 to be classified.
This is rather due to the fact that 0 is a simple figure, being only one circle. Someone should try hard in order to write an illegible 0.
# store dimensions of shape
dim0_1 = X_test[:, np.where(y_test == 1)].shape[0]
dim2_1 = X_test[:, np.where(y_test == 1)].shape[2]
dim0_1, dim2_1
(256, 114)
# Plot all 1s
fig = plt.figure(figsize = (20, 20))
gs = fig.add_gridspec(nrows = 11, ncols = 10,
hspace=0.4, wspace=0)
axes = gs.subplots(sharex='col', sharey='row')
for item, ax in enumerate(axes.flatten()):
ax.imshow(X_test[:, np.where(y_test == 1)].reshape(dim0_1,
dim2_1)[:, item].reshape((16,16)),
interpolation='gaussian',
cmap='jet')
ax.set_title('Item %d'% (item + 1), fontsize = 12)
plt.show()
Looking at the plots I notice that the quality of the image is excelent and therefore easy for digit 1 to be classified.
This is rather due to the fact that 1 is a simple figure, with no curves. Someone should try hard in order to write an illegible 1.
I will check the singular values for the different digits. These are the diagonal entries ${\displaystyle \sigma _{i}=\Sigma _{ii}}$ of $ {\displaystyle\mathbf {\Sigma}}$
# Check shapes of the SVDs
for d in range(10):
print('Digit', d)
for comp in range(3):
print(SVD[d][comp].shape)
print("------")
Digit 0 (256, 256) (256,) (567, 567) ------ Digit 1 (256, 256) (256,) (402, 402) ------ Digit 2 (256, 256) (256,) (328, 328) ------ Digit 3 (256, 256) (236,) (236, 236) ------ Digit 4 (256, 256) (235,) (235, 235) ------ Digit 5 (256, 256) (166,) (166, 166) ------ Digit 6 (256, 256) (256,) (269, 269) ------ Digit 7 (256, 256) (256,) (260, 260) ------ Digit 8 (256, 256) (256,) (260, 260) ------ Digit 9 (256, 256) (248,) (248, 248) ------
# extract the singular vales per digit
singular_values = {d:{} for d in range(10)}
max_singular_value = {d:{} for d in range(10)}
for d in range(0, 10):
sing_vals = np.zeros(np.diag(SVD[d][1]).shape[0])
for pos in range(0, np.diag(SVD[d][1]).shape[0]):
sing_vals[pos] = np.diag(SVD[d][1])[pos][pos]
singular_values[d] = sing_vals
max_singular_value[d] = sing_vals.max()
singulars = pd.DataFrame(max_singular_value.values(),
columns = ['Max singular values'])
# sort asceding
# check values with lowest score
singulars.sort_values(by='Max singular values', ascending=False)
| Max singular values | |
|---|---|
| 1 | 294.203154 |
| 0 | 241.555333 |
| 7 | 199.430930 |
| 9 | 192.695883 |
| 6 | 187.702719 |
| 2 | 174.640816 |
| 8 | 173.991941 |
| 4 | 169.436389 |
| 3 | 166.467824 |
| 5 | 127.596879 |
I notice that digits 0 and 1, that count as the best classifiable digits have the highest singular values, while digit 5 which is the worst classifiable digit has the lowest.
I will check the distribution of the singular values.
# plot all singular values per digit
fig = plt.figure(figsize = (10,8))
for digit in range(10):
plt.plot([i for i in range(SVD[digit][1].shape[0])],
SVD[digit][1], label = 'digit %d'%digit)
plt.legend()
plt.show()
I notice that the distribution follows the Zipf's law, where the bigger values are the few one concentrated between 0 and 50 components.
Therefore we will plot again the first 40 basis vectors and check graph again.
fig = plt.figure(figsize = (10,8))
for digit in range(10):
plt.scatter([i for i in range(SVD[digit][1][:40].shape[0])],
SVD[digit][1][:40], s = 4)
plt.plot([i for i in range(SVD[digit][1][:40].shape[0])],
SVD[digit][1][:40], label = 'digit %d'%digit)
plt.legend()
plt.show()
I notice that in most cases the curve drops at around 3 components and becomes almost horizontal around 20 components.
I plot again the first 10 basis vectors and check graph again.
fig = plt.figure(figsize = (10,8))
for digit in range(10):
plt.scatter([i for i in range(SVD[digit][1][:10].shape[0])],
SVD[digit][1][:10], s = 4)
plt.plot([i for i in range(SVD[digit][1][:10].shape[0])],
SVD[digit][1][:10], label = 'digit %d'%digit)
plt.legend()
plt.show()
Ι quantify this behaviour by taking only 1 - 15 basis vector of the SVD and counting the residuals.
I extract the first n columns of the **U** matrix of the calculated SVD. I then calculate the residuals based on the relation $||(I - U_kU_k^T)z ||$ for each digit.
# calculate residuals
# n is the number of basis vectors
residuals = {n:{} for n in range(15)}
for n in progressbar.progressbar(range(0, 15)):
print("Calculating for %d basis vectors." %n)
res_dig = {d:{} for d in range(10)}
for d in range(10):
UUT[d] = SVD[d][0][:, :n] @ SVD[d][0][:, :n].T
X = X_test[:, np.where(y_test == d)[0]]
for image in range(X.shape[1]):
res_dig[d] = np.linalg.norm((np.identity(X.shape[0]) - UUT[d])@X[:, image])
residuals[n] = res_dig
N/A% (0 of 15) | | Elapsed Time: 0:00:00 ETA: --:--:--
Calculating for 0 basis vectors.
13% (2 of 15) |### | Elapsed Time: 0:00:00 ETA: 0:00:02
Calculating for 1 basis vectors. Calculating for 2 basis vectors.
26% (4 of 15) |###### | Elapsed Time: 0:00:00 ETA: 0:00:02
Calculating for 3 basis vectors. Calculating for 4 basis vectors.
40% (6 of 15) |########## | Elapsed Time: 0:00:01 ETA: 0:00:01
Calculating for 5 basis vectors. Calculating for 6 basis vectors.
53% (8 of 15) |############# | Elapsed Time: 0:00:01 ETA: 0:00:01
Calculating for 7 basis vectors. Calculating for 8 basis vectors.
60% (9 of 15) |############### | Elapsed Time: 0:00:01 ETA: 0:00:01
Calculating for 9 basis vectors.
73% (11 of 15) |################# | Elapsed Time: 0:00:02 ETA: 0:00:00
Calculating for 10 basis vectors. Calculating for 11 basis vectors.
86% (13 of 15) |#################### | Elapsed Time: 0:00:02 ETA: 0:00:00
Calculating for 12 basis vectors. Calculating for 13 basis vectors.
100% (15 of 15) |########################| Elapsed Time: 0:00:02 Time: 0:00:02
Calculating for 14 basis vectors.
# Plot residuals for each digit and
# number of basis vectors
fig = plt.figure(figsize = (12, 12))
gs = fig.add_gridspec(nrows = 5, ncols = 2,
hspace=0.3, wspace=0.3)
axes = gs.subplots(sharex='col', sharey='row')
labels = [f"{i} basis vectors" for i in range(15)]
for digit, ax in enumerate(axes.flatten()):
for n in range(15):
ax.scatter(n, residuals[n][digit],
label = '%d bn' % (n+1))
ax.set_title('Digit %d' %digit)
ax.set_xlabel("Basis vectors")
ax.set_ylabel("Residuals")
fig.legend(labels=labels, loc="upper center", ncol=5)
plt.show()
print('Residuals, col: digits, row: basis vectors')
pd.DataFrame(residuals.values())
Residuals, col: digits, row: basis vectors
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 14.614268 | 15.240179 | 14.190426 | 14.818021 | 14.635076 | 14.282437 | 14.847349 | 14.738287 | 13.928814 | 14.853794 |
| 1 | 9.579020 | 4.459765 | 10.836507 | 7.895547 | 11.288203 | 11.041033 | 13.250774 | 10.108007 | 12.128783 | 6.018191 |
| 2 | 9.437465 | 3.185908 | 9.604192 | 7.352325 | 11.116217 | 10.391526 | 13.127189 | 9.098695 | 10.594199 | 5.868412 |
| 3 | 7.198936 | 3.115900 | 9.352826 | 7.256710 | 10.206284 | 9.328444 | 10.170362 | 6.400011 | 10.578604 | 5.750576 |
| 4 | 7.198886 | 1.393883 | 8.179168 | 7.237258 | 8.405224 | 9.178933 | 10.058755 | 6.375557 | 9.435594 | 3.413155 |
| 5 | 5.916823 | 1.302631 | 8.091694 | 7.212134 | 8.334876 | 9.156652 | 9.990679 | 6.244858 | 9.227624 | 3.183356 |
| 6 | 5.891331 | 1.302438 | 7.237459 | 6.965302 | 8.258772 | 8.387947 | 9.488848 | 5.188548 | 9.155742 | 3.150160 |
| 7 | 5.428833 | 1.259245 | 7.232314 | 6.887055 | 7.821954 | 8.379527 | 9.480526 | 5.188537 | 9.151236 | 3.056242 |
| 8 | 5.428828 | 1.213772 | 6.282430 | 6.649834 | 7.817079 | 7.708917 | 9.123631 | 5.106669 | 7.417670 | 2.985929 |
| 9 | 5.192864 | 1.201586 | 5.898572 | 6.598950 | 7.710936 | 6.834759 | 9.030296 | 5.106495 | 7.251322 | 2.904804 |
| 10 | 5.156646 | 1.151628 | 5.874896 | 6.536607 | 7.412355 | 6.834715 | 9.009408 | 5.014937 | 7.242901 | 2.824430 |
| 11 | 5.129022 | 1.148696 | 5.775039 | 6.341843 | 7.257884 | 6.238624 | 9.005408 | 5.014035 | 7.182256 | 2.707755 |
| 12 | 4.739871 | 1.048582 | 5.763843 | 6.214387 | 7.256431 | 6.157942 | 8.640515 | 4.958397 | 5.976871 | 2.702480 |
| 13 | 4.590125 | 1.005512 | 5.723595 | 5.650255 | 7.249309 | 6.131881 | 8.635969 | 4.466860 | 5.860289 | 2.628096 |
| 14 | 4.532944 | 0.949175 | 5.505916 | 5.320848 | 6.622385 | 6.113212 | 8.516577 | 3.987585 | 5.845950 | 2.562616 |
All digits have a high number of residuals when taken 1 to 3 basis vectors. There is a slight differentiation thought, since for example digits 1 drops its residuals down to $< 2$ already after 4 basis components. On the other hand digit 6 remains with a high number of residuals up to the use of 15 basis components.
Therefore, we could consider variation on the number of basis vectors when modeling the classifiacatoin of handwritten digits.
I check how many digits get correctly classified when using only one basis vector. I set a threshold of 70% for the minimum residuals per image and compare. If their ratio is $< 0.7$ then I regard the classification as succesfull, else I proceed using the 10 first basis vectors.
min_residual = []
classify = np.zeros(X_test.shape[1])
success_rate = 0
# for each image in test set
for image in range(X_test.shape[1]):
# initialize array for residuals
residual = np.zeros(10)
# extract image vector
Image = X_test[:,image]
# for each digit
for digit in range(10):
# calculate the residuals for the first basis vector of U matrix
# of the svd
residual[digit] = np.linalg.norm(np.outer(SVD[digit][0][:,:1],
SVD[digit][0][:,:1])@Image -\
Image)
# extract the minimum residuals per digit
one, two = np.sort(residual)[0:2]
min_residual.append(one/two)
# set a threshold, if the difference between
# the two minimum residuals is first equal to
# 70% of the second then
# classification is successful
if one <= 0.70 * two:
classify[image] = np.argmin(residual)
success_rate +=1
else:
for digit in range(10):
# calculate the residuals for the first 18 basis vectors of U matrix
# of the svd
residual[digit] = np.linalg.norm(SVD[digit][0][:,:10]@SVD[digit][0][:,:10].T@Image -\
Image)
classify[image] = np.argmin(residual)
success = confusion_matrix(y_test, classify).trace()/X_test.shape[1]
ratio = success_rate/X_test.shape[1]
print("The accuracy of the classification is calculated at", round(success*100, 3), "%.")
print("\nIn the {} % of the cases the use of one basis vector sufficed \nfor the classification to be accurate.".format(round(ratio*100, 3)))
The accuracy of the classification is calculated at 93.801 %. In the 22.102 % of the cases the use of one basis vector sufficed for the classification to be accurate.
Concluding, the results is satisfactory when using the same number of basis vectors for all digits.